#1. Load Required Packages
required_packages <- c("readxl","nortest")
for (package in required_packages) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package)
  }
  library(package, character.only = TRUE)
}
#Description: This section ensures the required packages (readxl, nortest) are installed and loaded.


#2. Import Data Frame
if (!exists("df")) {
  file_path <- "~/COHORT_Prediction_of_CIP.xlsx"
  df <- read_excel(file_path, sheet = "cohort", na = "#N/A")
}
#Description: This section imports data from an Excel file if the data frame df does not already exist.


#3. Filter Data
df_filtered_distance <- subset(df, pet_distance_ici <= 365 | is.na(pet_distance_ici))
df_filtered_distance <- subset(df_filtered_distance, pet_distance_ici >= -1)
df_filtered <- subset(df_filtered_distance, pet_impossible_spheres_placement == 0)
#Description: This section filters the data to include only records where distance between PET/CT and immunotherapy is between -1 and 365 days.


#4. Output Text Information
print(paste("Following immunotherapy,", sum(df_filtered$pneumonitis == 1), "patients (", round(sum(df_filtered$pneumonitis == 1) / length(df_filtered$pneumonitis) *100,1), "%) developed irPneumonitis."))

#CTCAE
count_grade_1 <- sum(df_filtered$pneumonitis == 1 & df_filtered$pneu_grade == 1)
count_grade_2 <- sum(df_filtered$pneumonitis == 1 & df_filtered$pneu_grade == 2)
count_grade_3 <- sum(df_filtered$pneumonitis == 1 & df_filtered$pneu_grade == 3)
count_grade_4 <- sum(df_filtered$pneumonitis == 1 & df_filtered$pneu_grade == 4)
count_grade_5 <- sum(df_filtered$pneumonitis == 1 & df_filtered$pneu_grade == 5)
count_pneumonitis <- sum(df_filtered$pneumonitis == 1)
print(paste("According to CTCAE, there were", count_grade_1, "patients (", round(count_grade_1 / count_pneumonitis *100,1), "%)  with grade 1 pneumonitis,",
             count_grade_2, "patients (", round(count_grade_2 / count_pneumonitis *100,1), "%)  with grade 2 pneumonitis,",
             count_grade_3, "patients (", round(count_grade_3 / count_pneumonitis *100,1), "%)  with grade 3 pneumonitis,",
             count_grade_4, "patients (", round(count_grade_4 / count_pneumonitis *100,1), "%)  with grade 4 pneumonitis and",
             count_grade_5, "patients (", round(count_grade_5 / count_pneumonitis *100,1), "%)  who died from respiratory compromise (grade 5)."))

#median time between the start of immunotherapy and the development of pneumonitis
filtered_data <- subset(df_filtered, pneumonitis == 1,c("no","pneu_ici_therapy_begin_distance","pneumonitis"))
median_value <- median(as.numeric(filtered_data$pneu_ici_therapy_begin_distance))
first_quartile <- quantile(as.numeric(filtered_data$pneu_ici_therapy_begin_distance), 0.25)
third_quartile <- quantile(as.numeric(filtered_data$pneu_ici_therapy_begin_distance), 0.75)
print(paste("The median interval between the start of ICI therapy and irPneumonitis was", median_value," days (1QU = ", first_quartile, " days; 3QU = ", third_quartile," days)"))

#pneumonitis proportion of thoracic radiation
filtered_data <- subset(df_filtered, pneumonitis == 1,c("no","radio_after_pet","pneumonitis"))
filtered_data2 <- subset(df_filtered, pneumonitis == 0,c("no","radio_after_pet","pneumonitis"))
tbl <- table(df_filtered$radio_after_pet, df_filtered$pneumonitis)
if (min(tbl) < 11) {
  test <- fisher.test(tbl)
} else {
  test <- chisq.test(tbl)
}
p_value <- test$p.value
incidence_radiation <- tbl[2, 2]  / (tbl[2, 2] + tbl[2, 1])
incidence_no_radiation <- tbl[1, 2]  / (tbl[1, 2] + tbl[1, 1])
relative_risk <- incidence_radiation / incidence_no_radiation
print(paste("... compared to those without pneumonitis (", round(sum(filtered_data$radio_after_pet == 1) / count(filtered_data) *100,1),"% vs.",  round(sum(filtered_data2$radio_after_pet == 1) / count(filtered_data2) *100,1), "%; p =", round(p_value,3),"relative risk = ",round(relative_risk,3),")."))

#median time between the radiation and the development of pneumonitis  
filtered_data <- subset(df_filtered, pneumonitis == 1 & radio_thorax == 1,c("pneu_radi_distance","pneumonitis"))
median_value <- median(as.numeric(filtered_data$pneu_radi_distance))
first_quartile <- quantile(as.numeric(filtered_data$pneu_radi_distance), 0.25)
third_quartile <- quantile(as.numeric(filtered_data$pneu_radi_distance), 0.75)
print(paste("The median time between irradiation and the development of a pneumonitis was", median_value," days (1QU = ", first_quartile, " days; 3QU = ", third_quartile," days)"))

#median time between the radiation and PET/CT
filtered_data <- subset(df_filtered, radio_before_pet == 1,c("pet_distance_radiation","radio_before_pet"))
median_value <- median(as.numeric(filtered_data$pet_distance_radiation))
first_quartile <- quantile(as.numeric(filtered_data$pet_distance_radiation), 0.25)
third_quartile <- quantile(as.numeric(filtered_data$pet_distance_radiation), 0.75)
print(paste("The median time between the lung radiation before the FDG-PET/CT and the beginning of ICI therapy was", median_value," days (1QU = ", first_quartile, " days; 3QU = ", third_quartile," days)"))

#diabetes and cip
tbl <- table(df_filtered$diabetes, df_filtered$pneumonitis)
test <- fisher.test(tbl)
p_value <- test$p.value
incidence_diabetes <- tbl[2, 2]  / (tbl[2, 2] + tbl[2, 1])
incidence_no_diabetes <- tbl[1, 2]  / (tbl[1, 2] + tbl[1, 1])
relative_risk <- incidence_diabetes / incidence_no_diabetes
print(paste("Patients with preexisting type II diabetes had a trend towards a lower risk of developing irPneumonitis compared to patients without diabetes (relative risk:", round(relative_risk, 3), ", p = ", round(p_value, 3), ")."))
# This section includes various text outputs, such as the number of patients


